This report presents preliminary analyses of Zanzibar’s small-scale fisheries, including gear usage, vessel types, fishing effort, catch rates, and revenue across different districts.
gear_viz_grouped <- stats %>% dplyr::group_by(district, gear) |> dplyr::count() |> dplyr::group_by(district) |> dplyr::mutate(total =sum(n),prc = n / total *100 ) |> dplyr::arrange(desc(prc), .by_group =TRUE) |>mutate(prc_label =paste0(round(prc, 1), "%")) |> dplyr::filter(total >1)ggplot(gear_viz_grouped, aes(x = prc, y =reorder_within(gear, prc, district))) +# Add dots, sized by percentagegeom_point(aes(size = prc, color = district), alpha =0.8) +# Add connecting segmentsgeom_segment(aes(xend =0, yend =reorder_within(gear, prc, district), color = district),alpha =0.3, size =0.8 ) +# Add percentage labelsgeom_text(aes(label = prc_label, color = district),hjust =-0.5,size =3,show.legend =FALSE ) +# Set size scalescale_size_continuous(range =c(2, 8)) +scale_color_manual(values = district_colors) +# Scale y axis with region-specific orderingscale_y_reordered() +# Set x-axis scalescale_x_continuous(labels =function(x) paste0(x, "%"),limits =c(0, max(gear_viz_grouped$prc) *1.2),expand =c(0, 0) ) +# Add informative labelslabs(title ="Fishing Gear Usage in Zanzibar",subtitle ="Gear type distribution by district (percentage of total usage)",x ="Gear usage",y ="",caption ="Source: PESKAS" ) +# Apply the custom themetheme_modern() +# Facet by regionfacet_wrap(~district, ncol =2, scales ="free_y") +# Hide the size legendguides(size ="none",color ="none" )
Vessel type
Show code
gear_viz_grouped <- stats %>% dplyr::mutate(vessel_type = dplyr::if_else(is.na(vessel_type), "Unknown", vessel_type)) |> dplyr::group_by(district, vessel_type) |> dplyr::count() |> dplyr::group_by(district) |> dplyr::mutate(total =sum(n),prc = n / total *100 ) |> dplyr::arrange(desc(prc), .by_group =TRUE) |>mutate(prc_label =paste0(round(prc, 1), "%")) |> dplyr::filter(total >1)ggplot(gear_viz_grouped, aes(x = prc, y =reorder_within(vessel_type, prc, district))) +# Add dots, sized by percentagegeom_point(aes(size = prc, color = district), alpha =0.8) +# Add connecting segmentsgeom_segment(aes(xend =0, yend =reorder_within(vessel_type, prc, district), color = district),alpha =0.3, size =0.8 ) +# Add percentage labelsgeom_text(aes(label = prc_label, color = district),hjust =-0.5,size =3,show.legend =FALSE ) +# Set size scalescale_color_manual(values = district_colors) +scale_size_continuous(range =c(2, 8)) +# Scale y axis with region-specific orderingscale_y_reordered() +# Set x-axis scalescale_x_continuous(labels =function(x) paste0(x, "%"),limits =c(0, max(gear_viz_grouped$prc) *1.2),expand =c(0, 0) ) +# Add informative labelslabs(title ="Vessel Types in Zanzibar",subtitle ="Vessel type distribution by district (percentage of total usage)",x ="Vessel type usage",y ="",caption ="Source: PESKAS" ) +# Apply the custom themetheme_modern() +theme(legend ="none",color ="none" ) +# Facet by regionfacet_wrap(~district, ncol =2, scales ="free_y") +# Hide the size legendguides(size ="none",color ="none" )
Fishing effort
Show code
fisher_data <- stats# Calculate summary statistics by districtdistrict_summary <- fisher_data %>% dplyr::group_by(district) %>%summarize(mean_fishers =mean(n_fishers, na.rm =TRUE),median_fishers =median(n_fishers, na.rm =TRUE),mean_duration =mean(trip_duration, na.rm =TRUE),median_duration =median(trip_duration, na.rm =TRUE),n_trips =n() ) %>%ungroup()# Create a two-panel visualizationp1 <-ggplot(district_summary, aes(x =reorder(district, mean_fishers), y = mean_fishers, fill = district)) +geom_col(width =0.7) +geom_text(aes(label =round(mean_fishers, 0), y = mean_fishers +0.5),vjust =0, size =3.5 ) +geom_text(aes(label =sprintf("n=%d", n_trips), y =0.5),vjust =0, hjust =0, angle =90, size =3, color ="white" ) +labs(title ="Average Number of fishers by District",subtitle =sprintf("Based on %d fishing trips", sum(district_summary$n_trips)),x ="",y ="Average N. of fihsers" ) +scale_fill_manual(values = district_colors) +theme_modern() +theme(legend.position ="none",panel.grid.major.x =element_blank(),axis.text.x =element_text(angle =45, hjust =1) )# Create a second panel showing district averagesp2 <-ggplot(district_summary, aes(x =reorder(district, mean_duration), y = mean_duration, fill = district)) +geom_col(width =0.7) +geom_text(aes(label =sprintf("%.1f h", mean_duration), y = mean_duration +0.5),vjust =0, size =3.5 ) +geom_text(aes(label =sprintf("n=%d", n_trips), y =0.5),vjust =0, hjust =0, angle =90, size =3, color ="white" ) +labs(title ="Average Fishing Trip Duration by District",subtitle =sprintf("Based on %d fishing trips", sum(district_summary$n_trips)),x ="",y ="Average Trip Duration (hours)" ) +scale_fill_manual(values = district_colors) +theme_modern() +theme(legend.position ="none",panel.grid.major.x =element_blank(),axis.text.x =element_text(angle =45, hjust =1) )combined_plot <- p1 / p2 +plot_layout(heights =c(2, 1))# Display the combined plotcombined_plot
Catch
Show code
district_summary <- dat %>% dplyr::select(district, cpue, rpue) |> dplyr::mutate(rpue_usd = rpue *0.00037) |>group_by(district) %>%summarize(mean_cpue =mean(cpue, na.rm =TRUE),median_cpue =median(cpue, na.rm =TRUE),mean_rpue_usd =mean(rpue_usd, na.rm =TRUE),median_rpue_usd =median(rpue_usd, na.rm =TRUE),n_samples =n() ) %>%ungroup() %>%# Sort by average CPUE for consistent orderingarrange(desc(mean_cpue)) |> dplyr::filter(!district =="north_a")# Create the ordered district factor to ensure consistent orderingordered_districts <- district_summary$district# Make the ordered version of the datadat_ordered <- dat %>%mutate(district =factor(district, levels = ordered_districts)) |> dplyr::filter(!district =="north_a")# Create a cleaner visualizationp1 <-ggplot(dat_ordered, aes(x = district, y = cpue, fill = district)) +geom_violin(alpha =0.7, trim =TRUE, scale ="width") +geom_boxplot(width =0.15, alpha =0.7, outlier.shape =NA) +scale_fill_manual(values = district_colors) +labs(title ="Catch per Unit Effort by District",x ="",y ="CPUE (kg/fisher/hour)" ) +# Apply your themetheme_modern() +# Add modifications AFTER applying the themetheme(legend.position ="none",axis.text.x =element_text(angle =45, hjust =1, size =8) ) +# Adjust y-axis limits to make room for labels at the bottomscale_y_continuous(limits =c(-0.5, NA)) +# Add sample size labels with adjusted y-positiongeom_text(data = district_summary,aes(x = district, y =-0.3, # Position below 0label =sprintf("n=%d", n_samples) ),size =2.5 )# For the bar chart - adjust positionsp2 <-ggplot(district_summary, aes(x =reorder(district, mean_cpue), y = mean_cpue, fill = district)) +geom_col() +scale_fill_manual(values = district_colors) +# Expand the x-axis limits to make room for labelsscale_x_discrete(expand =expansion(add =c(0, 2))) +coord_flip() +labs(title ="Average CPUE by District",x ="",y ="kg/fisher/hour" ) +# Apply your themetheme_modern() +# Add modifications AFTER applying the themetheme(legend.position ="none",panel.grid.major.y =element_blank() ) +# Add text with adjusted positiongeom_text(aes(label =sprintf("%.2f", mean_cpue)),hjust =-0.3, size =3 )p1 + p2 + patchwork::plot_layout(widths =c(1.5, 1))
Revenue
Show code
district_summary <- dat %>% dplyr::select(district, cpue, rpue) |> dplyr::mutate(rpue_usd = rpue *0.00037) |>group_by(district) %>%summarize(mean_cpue =mean(cpue, na.rm =TRUE),median_cpue =median(cpue, na.rm =TRUE),mean_rpue_usd =mean(rpue_usd, na.rm =TRUE),median_rpue_usd =median(rpue_usd, na.rm =TRUE),n_samples =n() ) %>%ungroup() %>%# Sort by average CPUE for consistent orderingarrange(desc(mean_rpue_usd)) |> dplyr::filter(!district =="north_a")# Create the ordered district factor to ensure consistent orderingordered_districts <- district_summary$district# Make the ordered version of the datadat_ordered <- dat %>%mutate(district =factor(district, levels = ordered_districts)) |> dplyr::filter(!district =="north_a")# Create a cleaner visualizationp1 <-ggplot(dat_ordered, aes(x = district, y = rpue *0.00037, fill = district)) +geom_violin(alpha =0.7, trim =TRUE, scale ="width") +geom_boxplot(width =0.15, alpha =0.7, outlier.shape =NA) +scale_fill_manual(values = district_colors) +labs(title ="Revenue per Unit Effort by District",x ="",y ="RPUE (USD/fisher/hour)" ) +# Apply your themetheme_modern() +# Add modifications AFTER applying the themetheme(legend.position ="none",axis.text.x =element_text(angle =45, hjust =1, size =8) ) +# Adjust y-axis limits to make room for labels at the bottomscale_y_continuous(limits =c(-0.5, NA), n.breaks =10) +# Add sample size labels with adjusted y-positiongeom_text(data = district_summary,aes(x = district, y =-0.3, # Position below 0label =sprintf("n=%d", n_samples) ),size =2.5 )# For the bar chart - adjust positionsp2 <-ggplot(district_summary, aes(x =reorder(district, mean_rpue_usd), y = mean_rpue_usd, fill = district)) +geom_col() +scale_fill_manual(values = district_colors) +# Expand the x-axis limits to make room for labelsscale_x_discrete(expand =expansion(add =c(0, 2))) +coord_flip() +labs(title ="RPUE by District",x ="",y ="USD/fisher/hour" ) +# Apply your themetheme_modern() +# Add modifications AFTER applying the themetheme(legend.position ="none",panel.grid.major.y =element_blank() ) +# Add text with adjusted positiongeom_text(aes(label =sprintf("%.2f", mean_rpue_usd)),hjust =-0.3, size =3 )p1 + p2 + patchwork::plot_layout(widths =c(1.5, 1))
Fish groups
Show code
taxa_overall <- taxa %>%# Sum catch weight by fish group across all districts dplyr::group_by(fish_group) %>% dplyr::filter(!is.na(fish_group)) |> dplyr::summarize(tot_catch_kg =sum(tot_catch_kg),.groups ="drop" ) %>%# Calculate percentage of total catch dplyr::mutate(total_catch =sum(tot_catch_kg),prc = tot_catch_kg / total_catch *100 ) %>%# Arrange in descending order of catch dplyr::arrange(dplyr::desc(tot_catch_kg)) %>%# Take top 5 and create "Others" category dplyr::mutate(rank = dplyr::row_number(),fish_category =if_else(rank <=5, fish_group, "Others") ) %>%# Group by fish category (combining "Others") dplyr::group_by(fish_category) %>% dplyr::summarize(tot_catch_kg =sum(tot_catch_kg),prc =sum(prc),.groups ="drop" ) %>%# Arrange again for consistent ordering dplyr::arrange(dplyr::desc(tot_catch_kg)) %>%# Create label with percentage for the pie chart dplyr::mutate(label =paste0(fish_category, "\n", round(prc, 1), "%"))# Create a pie chartggplot(taxa_overall, aes(x ="", y = prc, fill = fish_category)) +geom_bar(stat ="identity", width =1) +coord_polar("y", start =0) +# Use a colorful palettescale_fill_brewer(palette ="Set2") +# Add labelsgeom_text(aes(label = label),position =position_stack(vjust =0.5),size =3.5 ) +# Add informative titlelabs(title ="Overall Fish Group Composition in Zanzibar",subtitle ="Top 5 fish groups across all districts",caption ="Source: PESKAS" ) +# Remove unnecessary elementstheme_void() +theme(plot.title =element_text(size =18, face ="bold", margin =margin(b =10)),plot.subtitle =element_text(size =12, color ="#555555", margin =margin(b =20)),plot.caption =element_markdown(size =8, color ="#888888", margin =margin(t =15)),legend.position ="none",plot.margin =margin(20, 25, 20, 25) )
Show code
taxa_processed <- taxa |> dplyr::filter(!is.na(fish_group)) %>% dplyr::group_by(district) %>%# Arrange in descending order of catch within each district dplyr::arrange(district, desc(tot_catch_kg)) %>%# Add a rank column within each district dplyr::mutate(rank = dplyr::row_number()) %>%# Create a new column for the grouped categories dplyr::mutate(fish_category = dplyr::if_else(rank <=5, fish_group, "Others") ) %>%# Remove the rank column as it's no longer needed dplyr::select(-rank) %>%# Group by both district and the new fish category dplyr::group_by(district, fish_category) %>%# Summarize to combine all "others" into one row per district dplyr::summarize(tot_catch_kg =sum(tot_catch_kg),prc =sum(prc),.groups ="drop" ) %>%# Create a factor with "Others" as the last level dplyr::mutate(# Create ordered factor where "Others" is always last regardless of percentagefish_category =factor(fish_category, levels =c(setdiff(unique(fish_category), "Others"), "Others")) ) %>%# Now arrange by district, then by this factor (which keeps "Others" last) dplyr::arrange(district, desc(fish_category !="Others"), desc(tot_catch_kg))ggplot(taxa_processed, aes(x = prc, y =reorder_within(fish_category, ifelse(fish_category =="Others", -Inf, prc), district))) +# Add dots, sized by percentagegeom_point(aes(size = prc, color = district), alpha =0.8) +# Add connecting segmentsgeom_segment(aes(xend =0, yend =reorder_within(fish_category, ifelse(fish_category =="Others", -Inf, prc), district), color = district),alpha =0.3, size =0.8 ) +# Add percentage labelsgeom_text(aes(label =paste0(round(prc, 1), "%"), color = district),hjust =-0.5,size =3,show.legend =FALSE ) +# Set size scalescale_size_continuous(range =c(2, 8)) +scale_color_manual(values = district_colors) +# Scale y axis with region-specific orderingscale_y_reordered() +# Set x-axis scalescale_x_continuous(labels =function(x) paste0(x, "%"),limits =c(0, max(taxa_processed$prc) *1.2),expand =c(0, 0) ) +# Add informative labelslabs(title ="Fish Groups Composition in Zanzibar",subtitle ="Top 5 fish groups by district (percentage of total catch)",x ="Percentage of catch",y ="",caption ="Source: PESKAS" ) +# Apply the custom themetheme_modern() +# Facet by regionfacet_wrap(~district, ncol =2, scales ="free_y") +# Hide the size legendguides(size ="none",color ="none" )
Source Code
---title: "Zanzibar small scale fishery"subtitle: "Preliminary analyses"author: "Lorenzo Longobardi"code-fold: truecode-summary: "Show code"css: style.csstoc_float: trueformat: html: self-contained: true toc: true toc-depth: 3 theme: cosmo code-tools: true---```{r setup, include=FALSE}knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)library(sf)library(dplyr)library(ggplot2)library(lubridate)library(scales)library(ggtext)library(forcats)library(tidyr)library(magrittr)library(tidytext)library(patchwork)```## 1. AimThis report presents preliminary analyses of Zanzibar's small-scale fisheries, including gear usage, vessel types, fishing effort, catch rates, and revenue across different districts.```{r data-import_constants}path <- "inst/reports/zanizbarSSF_clean__20250414093746_da3c69d__.parquet"dat <- rio::import("zanizbarSSF_clean__20250414093746_da3c69d__.parquet") |> dplyr::as_tibble()stats <- rio::import("fishery_stats__20250417160421_07d4dba__.parquet") |> dplyr::as_tibble()taxa <- rio::import("aggregated_taxa__20250427171641_ab5e96e__.parquet") |> dplyr::as_tibble()if (FALSE) {we <- validated_surveys |> dplyr::mutate(n_fishers = .data$no_men_fishers + .data$no_women_fishers + .data$no_child_fishers) |> dplyr::select(submission_id, district, trip_duration, gear, vessel_type, n_fishers) |> dplyr::group_by(submission_id) |> dplyr::summarise(dplyr::across(dplyr::everything(), ~ dplyr::first(.x))) |> dplyr::mutate( gear = dplyr::case_when( .data$gear == "HL" ~ "Hand line", .data$gear == "BS" ~ "Beach Seine", .data$gear == "CS" ~ "Cast Net", .data$gear == "GN" ~ "Gill Net", .data$gear == "LL" ~ "Long line", .data$gear == "SP" ~ "Spear gun", .data$gear == "SR" ~ "Stick Rod", .data$gear == "PS" ~ "Purse Seine", .data$gear == "RN" ~ "Ring Net", .data$gear == "SN" ~ "Shark Net", .data$gear == "TR" ~ "Trap", TRUE ~ .data$gear # Keep original value if no match ), vessel_type = dplyr::case_when( .data$vessel_type == "1" ~ "Fibre Boat", .data$vessel_type == "2" ~ "Dhow", .data$vessel_type == "3" ~ "Dugout canoe", .data$vessel_type == "4" ~ "Outrigger canoe", .data$vessel_type == "5" ~ "Planked Boat", .data$vessel_type == "6" ~ "Wooden Boat", TRUE ~ .data$vessel_type # Keep original value if no match ) ) }district_colors <- c( "urban" = "#1E88E5", # Blue "west_a" = "#FF5722", # Orange "central" = "#43A047", # Green "west_b" = "#FFC107", # Yellow "north_a" = "#9C27B0", # Purple "wete" = "#F44336", # Red "mkoani" = "#009688", # Teal "north_b" = "#795548", # Brown "chake_chake" = "#607D8B", # Blue Grey "south" = "#3F51B5", # Indigo "micheweni" = "#E91E63" # Pink)scale_y_reordered <- function(...) { scale_y_discrete(labels = function(x) gsub("___.*", "", x), ...)}reorder_within <- function(x, by, within, fun = mean, sep = "___", ...) { new_x <- paste(x, within, sep = sep) stats::reorder(new_x, by, FUN = fun)}# Create a custom themetheme_modern <- function() { theme_minimal() + theme( # Text elements plot.title = element_text(size = 18, face = "bold", margin = margin(b = 10)), plot.subtitle = element_text(size = 12, color = "#555555", margin = margin(b = 20)), plot.caption = element_markdown(size = 8, color = "#888888", margin = margin(t = 15)), # Axis formatting axis.title = element_text(size = 10, color = "#555555", face = "bold"), axis.text = element_text(size = 9, color = "#333333"), axis.text.x = element_text(angle = 0), axis.title.x = element_text(margin = margin(t = 10)), axis.title.y = element_text(margin = margin(r = 10)), # Grid lines panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(), panel.grid.major.y = element_line(color = "#E0E0E0", size = 0.3), # Legend legend.position = "top", legend.title = element_blank(), legend.text = element_text(size = 10), legend.background = element_rect(fill = "white", color = NA), legend.key.size = unit(1, "cm"), # Plot background plot.background = element_rect(fill = "white", color = NA), panel.background = element_rect(fill = "white", color = NA), # Margins plot.margin = margin(20, 25, 20, 25) )}```### Fishery characterization#### Gear usage```{r characterization, fig.height=10, fig.width=10}gear_viz_grouped <- stats %>% dplyr::group_by(district, gear) |> dplyr::count() |> dplyr::group_by(district) |> dplyr::mutate( total = sum(n), prc = n / total * 100 ) |> dplyr::arrange(desc(prc), .by_group = TRUE) |> mutate(prc_label = paste0(round(prc, 1), "%")) |> dplyr::filter(total > 1)ggplot(gear_viz_grouped, aes(x = prc, y = reorder_within(gear, prc, district))) + # Add dots, sized by percentage geom_point(aes(size = prc, color = district), alpha = 0.8) + # Add connecting segments geom_segment(aes(xend = 0, yend = reorder_within(gear, prc, district), color = district), alpha = 0.3, size = 0.8 ) + # Add percentage labels geom_text(aes(label = prc_label, color = district), hjust = -0.5, size = 3, show.legend = FALSE ) + # Set size scale scale_size_continuous(range = c(2, 8)) + scale_color_manual(values = district_colors) + # Scale y axis with region-specific ordering scale_y_reordered() + # Set x-axis scale scale_x_continuous( labels = function(x) paste0(x, "%"), limits = c(0, max(gear_viz_grouped$prc) * 1.2), expand = c(0, 0) ) + # Add informative labels labs( title = "Fishing Gear Usage in Zanzibar", subtitle = "Gear type distribution by district (percentage of total usage)", x = "Gear usage", y = "", caption = "Source: PESKAS" ) + # Apply the custom theme theme_modern() + # Facet by region facet_wrap(~district, ncol = 2, scales = "free_y") + # Hide the size legend guides( size = "none", color = "none" )```#### Vessel type```{r vessels, fig.height=10, fig.width=10}gear_viz_grouped <- stats %>% dplyr::mutate(vessel_type = dplyr::if_else(is.na(vessel_type), "Unknown", vessel_type)) |> dplyr::group_by(district, vessel_type) |> dplyr::count() |> dplyr::group_by(district) |> dplyr::mutate( total = sum(n), prc = n / total * 100 ) |> dplyr::arrange(desc(prc), .by_group = TRUE) |> mutate(prc_label = paste0(round(prc, 1), "%")) |> dplyr::filter(total > 1)ggplot(gear_viz_grouped, aes(x = prc, y = reorder_within(vessel_type, prc, district))) + # Add dots, sized by percentage geom_point(aes(size = prc, color = district), alpha = 0.8) + # Add connecting segments geom_segment(aes(xend = 0, yend = reorder_within(vessel_type, prc, district), color = district), alpha = 0.3, size = 0.8 ) + # Add percentage labels geom_text(aes(label = prc_label, color = district), hjust = -0.5, size = 3, show.legend = FALSE ) + # Set size scale scale_color_manual(values = district_colors) + scale_size_continuous(range = c(2, 8)) + # Scale y axis with region-specific ordering scale_y_reordered() + # Set x-axis scale scale_x_continuous( labels = function(x) paste0(x, "%"), limits = c(0, max(gear_viz_grouped$prc) * 1.2), expand = c(0, 0) ) + # Add informative labels labs( title = "Vessel Types in Zanzibar", subtitle = "Vessel type distribution by district (percentage of total usage)", x = "Vessel type usage", y = "", caption = "Source: PESKAS" ) + # Apply the custom theme theme_modern() + theme( legend = "none", color = "none" ) + # Facet by region facet_wrap(~district, ncol = 2, scales = "free_y") + # Hide the size legend guides( size = "none", color = "none" )```#### Fishing effort```{r duration, fig.height=10, fig.width=10}fisher_data <- stats# Calculate summary statistics by districtdistrict_summary <- fisher_data %>% dplyr::group_by(district) %>% summarize( mean_fishers = mean(n_fishers, na.rm = TRUE), median_fishers = median(n_fishers, na.rm = TRUE), mean_duration = mean(trip_duration, na.rm = TRUE), median_duration = median(trip_duration, na.rm = TRUE), n_trips = n() ) %>% ungroup()# Create a two-panel visualizationp1 <- ggplot(district_summary, aes(x = reorder(district, mean_fishers), y = mean_fishers, fill = district)) + geom_col(width = 0.7) + geom_text(aes(label = round(mean_fishers, 0), y = mean_fishers + 0.5), vjust = 0, size = 3.5 ) + geom_text(aes(label = sprintf("n=%d", n_trips), y = 0.5), vjust = 0, hjust = 0, angle = 90, size = 3, color = "white" ) + labs( title = "Average Number of fishers by District", subtitle = sprintf("Based on %d fishing trips", sum(district_summary$n_trips)), x = "", y = "Average N. of fihsers" ) + scale_fill_manual(values = district_colors) + theme_modern() + theme( legend.position = "none", panel.grid.major.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1) )# Create a second panel showing district averagesp2 <- ggplot(district_summary, aes(x = reorder(district, mean_duration), y = mean_duration, fill = district)) + geom_col(width = 0.7) + geom_text(aes(label = sprintf("%.1f h", mean_duration), y = mean_duration + 0.5), vjust = 0, size = 3.5 ) + geom_text(aes(label = sprintf("n=%d", n_trips), y = 0.5), vjust = 0, hjust = 0, angle = 90, size = 3, color = "white" ) + labs( title = "Average Fishing Trip Duration by District", subtitle = sprintf("Based on %d fishing trips", sum(district_summary$n_trips)), x = "", y = "Average Trip Duration (hours)" ) + scale_fill_manual(values = district_colors) + theme_modern() + theme( legend.position = "none", panel.grid.major.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1) )combined_plot <- p1 / p2 + plot_layout(heights = c(2, 1))# Display the combined plotcombined_plot```### Catch```{r cpue, fig.height=6, fig.width=10}district_summary <- dat %>% dplyr::select(district, cpue, rpue) |> dplyr::mutate(rpue_usd = rpue * 0.00037) |> group_by(district) %>% summarize( mean_cpue = mean(cpue, na.rm = TRUE), median_cpue = median(cpue, na.rm = TRUE), mean_rpue_usd = mean(rpue_usd, na.rm = TRUE), median_rpue_usd = median(rpue_usd, na.rm = TRUE), n_samples = n() ) %>% ungroup() %>% # Sort by average CPUE for consistent ordering arrange(desc(mean_cpue)) |> dplyr::filter(!district == "north_a")# Create the ordered district factor to ensure consistent orderingordered_districts <- district_summary$district# Make the ordered version of the datadat_ordered <- dat %>% mutate(district = factor(district, levels = ordered_districts)) |> dplyr::filter(!district == "north_a")# Create a cleaner visualizationp1 <- ggplot(dat_ordered, aes(x = district, y = cpue, fill = district)) + geom_violin(alpha = 0.7, trim = TRUE, scale = "width") + geom_boxplot(width = 0.15, alpha = 0.7, outlier.shape = NA) + scale_fill_manual(values = district_colors) + labs( title = "Catch per Unit Effort by District", x = "", y = "CPUE (kg/fisher/hour)" ) + # Apply your theme theme_modern() + # Add modifications AFTER applying the theme theme( legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1, size = 8) ) + # Adjust y-axis limits to make room for labels at the bottom scale_y_continuous(limits = c(-0.5, NA)) + # Add sample size labels with adjusted y-position geom_text( data = district_summary, aes( x = district, y = -0.3, # Position below 0 label = sprintf("n=%d", n_samples) ), size = 2.5 )# For the bar chart - adjust positionsp2 <- ggplot(district_summary, aes(x = reorder(district, mean_cpue), y = mean_cpue, fill = district)) + geom_col() + scale_fill_manual(values = district_colors) + # Expand the x-axis limits to make room for labels scale_x_discrete(expand = expansion(add = c(0, 2))) + coord_flip() + labs( title = "Average CPUE by District", x = "", y = "kg/fisher/hour" ) + # Apply your theme theme_modern() + # Add modifications AFTER applying the theme theme( legend.position = "none", panel.grid.major.y = element_blank() ) + # Add text with adjusted position geom_text(aes(label = sprintf("%.2f", mean_cpue)), hjust = -0.3, size = 3 )p1 + p2 + patchwork::plot_layout(widths = c(1.5, 1))```### Revenue```{r rpue, fig.height=6, fig.width=10}district_summary <- dat %>% dplyr::select(district, cpue, rpue) |> dplyr::mutate(rpue_usd = rpue * 0.00037) |> group_by(district) %>% summarize( mean_cpue = mean(cpue, na.rm = TRUE), median_cpue = median(cpue, na.rm = TRUE), mean_rpue_usd = mean(rpue_usd, na.rm = TRUE), median_rpue_usd = median(rpue_usd, na.rm = TRUE), n_samples = n() ) %>% ungroup() %>% # Sort by average CPUE for consistent ordering arrange(desc(mean_rpue_usd)) |> dplyr::filter(!district == "north_a")# Create the ordered district factor to ensure consistent orderingordered_districts <- district_summary$district# Make the ordered version of the datadat_ordered <- dat %>% mutate(district = factor(district, levels = ordered_districts)) |> dplyr::filter(!district == "north_a")# Create a cleaner visualizationp1 <- ggplot(dat_ordered, aes(x = district, y = rpue * 0.00037, fill = district)) + geom_violin(alpha = 0.7, trim = TRUE, scale = "width") + geom_boxplot(width = 0.15, alpha = 0.7, outlier.shape = NA) + scale_fill_manual(values = district_colors) + labs( title = "Revenue per Unit Effort by District", x = "", y = "RPUE (USD/fisher/hour)" ) + # Apply your theme theme_modern() + # Add modifications AFTER applying the theme theme( legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1, size = 8) ) + # Adjust y-axis limits to make room for labels at the bottom scale_y_continuous(limits = c(-0.5, NA), n.breaks = 10) + # Add sample size labels with adjusted y-position geom_text( data = district_summary, aes( x = district, y = -0.3, # Position below 0 label = sprintf("n=%d", n_samples) ), size = 2.5 )# For the bar chart - adjust positionsp2 <- ggplot(district_summary, aes(x = reorder(district, mean_rpue_usd), y = mean_rpue_usd, fill = district)) + geom_col() + scale_fill_manual(values = district_colors) + # Expand the x-axis limits to make room for labels scale_x_discrete(expand = expansion(add = c(0, 2))) + coord_flip() + labs( title = "RPUE by District", x = "", y = "USD/fisher/hour" ) + # Apply your theme theme_modern() + # Add modifications AFTER applying the theme theme( legend.position = "none", panel.grid.major.y = element_blank() ) + # Add text with adjusted position geom_text(aes(label = sprintf("%.2f", mean_rpue_usd)), hjust = -0.3, size = 3 )p1 + p2 + patchwork::plot_layout(widths = c(1.5, 1))```### Fish groups```{r taxa-all, fig.height=7, fig.width=7}taxa_overall <- taxa %>% # Sum catch weight by fish group across all districts dplyr::group_by(fish_group) %>% dplyr::filter(!is.na(fish_group)) |> dplyr::summarize( tot_catch_kg = sum(tot_catch_kg), .groups = "drop" ) %>% # Calculate percentage of total catch dplyr::mutate( total_catch = sum(tot_catch_kg), prc = tot_catch_kg / total_catch * 100 ) %>% # Arrange in descending order of catch dplyr::arrange(dplyr::desc(tot_catch_kg)) %>% # Take top 5 and create "Others" category dplyr::mutate( rank = dplyr::row_number(), fish_category = if_else(rank <= 5, fish_group, "Others") ) %>% # Group by fish category (combining "Others") dplyr::group_by(fish_category) %>% dplyr::summarize( tot_catch_kg = sum(tot_catch_kg), prc = sum(prc), .groups = "drop" ) %>% # Arrange again for consistent ordering dplyr::arrange(dplyr::desc(tot_catch_kg)) %>% # Create label with percentage for the pie chart dplyr::mutate(label = paste0(fish_category, "\n", round(prc, 1), "%"))# Create a pie chartggplot(taxa_overall, aes(x = "", y = prc, fill = fish_category)) + geom_bar(stat = "identity", width = 1) + coord_polar("y", start = 0) + # Use a colorful palette scale_fill_brewer(palette = "Set2") + # Add labels geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3.5 ) + # Add informative title labs( title = "Overall Fish Group Composition in Zanzibar", subtitle = "Top 5 fish groups across all districts", caption = "Source: PESKAS" ) + # Remove unnecessary elements theme_void() + theme( plot.title = element_text(size = 18, face = "bold", margin = margin(b = 10)), plot.subtitle = element_text(size = 12, color = "#555555", margin = margin(b = 20)), plot.caption = element_markdown(size = 8, color = "#888888", margin = margin(t = 15)), legend.position = "none", plot.margin = margin(20, 25, 20, 25) )``````{r taxa-district, fig.height=10, fig.width=10}taxa_processed <- taxa |> dplyr::filter(!is.na(fish_group)) %>% dplyr::group_by(district) %>% # Arrange in descending order of catch within each district dplyr::arrange(district, desc(tot_catch_kg)) %>% # Add a rank column within each district dplyr::mutate(rank = dplyr::row_number()) %>% # Create a new column for the grouped categories dplyr::mutate( fish_category = dplyr::if_else(rank <= 5, fish_group, "Others") ) %>% # Remove the rank column as it's no longer needed dplyr::select(-rank) %>% # Group by both district and the new fish category dplyr::group_by(district, fish_category) %>% # Summarize to combine all "others" into one row per district dplyr::summarize( tot_catch_kg = sum(tot_catch_kg), prc = sum(prc), .groups = "drop" ) %>% # Create a factor with "Others" as the last level dplyr::mutate( # Create ordered factor where "Others" is always last regardless of percentage fish_category = factor(fish_category, levels = c(setdiff(unique(fish_category), "Others"), "Others")) ) %>% # Now arrange by district, then by this factor (which keeps "Others" last) dplyr::arrange(district, desc(fish_category != "Others"), desc(tot_catch_kg))ggplot(taxa_processed, aes(x = prc, y = reorder_within(fish_category, ifelse(fish_category == "Others", -Inf, prc), district))) + # Add dots, sized by percentage geom_point(aes(size = prc, color = district), alpha = 0.8) + # Add connecting segments geom_segment(aes(xend = 0, yend = reorder_within(fish_category, ifelse(fish_category == "Others", -Inf, prc), district), color = district), alpha = 0.3, size = 0.8 ) + # Add percentage labels geom_text(aes(label = paste0(round(prc, 1), "%"), color = district), hjust = -0.5, size = 3, show.legend = FALSE ) + # Set size scale scale_size_continuous(range = c(2, 8)) + scale_color_manual(values = district_colors) + # Scale y axis with region-specific ordering scale_y_reordered() + # Set x-axis scale scale_x_continuous( labels = function(x) paste0(x, "%"), limits = c(0, max(taxa_processed$prc) * 1.2), expand = c(0, 0) ) + # Add informative labels labs( title = "Fish Groups Composition in Zanzibar", subtitle = "Top 5 fish groups by district (percentage of total catch)", x = "Percentage of catch", y = "", caption = "Source: PESKAS" ) + # Apply the custom theme theme_modern() + # Facet by region facet_wrap(~district, ncol = 2, scales = "free_y") + # Hide the size legend guides( size = "none", color = "none" )```